/* This file assembles county-level characteristics from multiple different sources, 
along with some state-level characteristics, and cleans and merges the data into 
a county-year level file of location characteristics
**********************************************************************************/

**********************************************************************************
*** prepare county level land area ***
**********************************************************************************
import delimited using "$datadir/raw/DEC_10_SF1_GCTPH1.US05PR_with_ann.csv", clear varname(2)
split geographicarea, p(" - ")

keep if geographicarea1 == "United States"
keep geographicarea2 geographicarea3 area* targetgeoid2 population
ren areainsquaremiles* *
ren target fipscounty

drop in 1
bysort geographicarea2: g count = _N
tab count
drop count

drop if mi(geographicarea3)

* manual adjustment: 
* 1. add skagway and hoonah-angon together = 2232
* 2. add wrangell and petersburg together = 2200
* 3. wade hampton area is recently renamed to Ketchikan, so add it and prince of wales together = 2201
replace fipscounty = 2232 if inlist(fipscounty, 2230, 2015)
replace fipscounty = 2280 if inlist(fipscounty, 2275, 2195)
replace fipscounty = 2201 if inlist(fipscounty, 2270, 2198)

collapse (sum) landarea, by(fipscounty) 

save "$temp/landarea", replace

********************************************************************************
*** prepare airport data (FAA) ***
**********************************************************************************
import excel using "$datadir/raw/airports.xlsx", firstrow clear case(l)

rename airport airport_name
g airport =1 
g airport_small=(role=="P-S")
g airport_med=(role=="P-M")
g airport_large=(role=="P-L")

split city, parse("/")
rename city orig_city 

reshape long city, i(airport* state enplanements role)
drop if city==""
drop if regexm(city,"Washington, D.C.")
replace city="Tacoma" if regexm(city, "Tacoma")
replace city="Arlington" if regexm(city, "Arlington")
replace city="Boise City" if city=="Boise"
replace city="Arlington" if regexm(city,"Chantilly")
replace city="Seattle" if regexm(city, "Seattle")

replace city=strtrim(city)
replace state=strtrim(state)

collapse (sum) airport airport_small airport_med airport_large enplanements, by(city state)

drop if enplanements<100000

save "$temp/city_air", replace

*** process airport data to the county level ***
insheet using "$datadir/crosswalk/cbsa2fipscty.csv", clear
drop if fipsstate==.

split cbsatitle, parse(",") gen(city)

rename city1 city
rename city2 state

split city, parse("-")
rename city orig_city

reshape long city, i(cbsatitle county statename fips*)
drop if city==""

replace city=strtrim(city)
replace state=strtrim(state)	

rename state state_cbsa
rename fipsstate fips

statastates, name(statename) 
keep if _m==3 
drop _m state_fips 

rename state_abbrev state 
keep city state fips fipscounty countycounty orig_city
duplicates drop

merge m:1 city state using  "$temp/city_air" 

replace fipscounty = 37 if _m == 2 & city == "Burbank"
replace fipscounty = 117 if _m == 2 & city == "Covington"
replace fipscounty = 103 if _m == 2 & city == "Islip"
replace fipscounty = 111 if _m == 2 & city == "Louisville"
replace fipscounty = 59 if _m == 2 & city == "Orange County"
replace fipscounty = 65 if _m == 2 & city == "Palm Springs"
replace fipscounty = 5 if _m == 2 & city == "Panama City Beach"
replace fipscounty = 119 if _m == 2 & city == "White Plains"
replace fipscounty = 107 if _m == 2 & city == "Dulles "

keep state fipscounty airport-enplanements
duplicates drop //get rid of double counting for counties that have multiple cities linked to the same airport

collapse (sum) airport airport_small airport_med airport_large enplanements, by(state fipscounty)

g airport_any=(airport_large>0)
replace airport_any=1 if airport_med>0

keep airport_any airport_large state fipscounty 
rename fipscounty countyfip 
rename state stateabbrev
save "$temp/airport", replace

********************************************************************************
*** prepare university data (NSF) ***
**********************************************************************************
u $datadir/raw/university_RD, clear 

egen r = rowtotal(*fund) // total r&d funding 
replace univ_name = lower(trim(stritrim(univ_name)))
save "$temp/rd", replace

u $datadir/raw/university_location, clear
replace univ_name = subinstr(univ_name,", All Campuses","",.)
replace univ_name = lower(trim(stritrim(univ_name)))
joinby univ_name using "$temp/rd", unmatched(none)

keep univ_name fipscty countyname fipstate r year

collapse (max) r , by(fips* univ_name)
g n_univ = 1
g top_RD = (r>500000)

collapse (sum) top_RD n_univ, by(fips*)

label var top_RD "top research university"
label var n_univ "number of universities in county"

g fips = fipstate*1000 + fipscty 
rename fipstate statefip 
drop fipscty 
save $temp/university, replace

***************************************************************************
*** prepare road network data (EPA) ***
*Smart Location Road Network 
*URL: https://www.epa.gov/smartgrowth/smart-location-mapping
****************************************************************************
insheet using $datadir/raw/roadnetwork.txt, comma clear 
*this is much more disaggregate than county , will average road network over county 
g total_roadnetwork = d3a 
g auto_roadnetwork = d3aao
collapse (mean) *_roadnetwork , by(statefp countyfp)
rename statefp statefip 
rename countyfp countyfip 
g fips = statefip*1000 + countyfip 
label var total_roadnetwork "Total road network density"
label var auto_roadnetwork "Network density in terms of facility miles of auto-oriented links per square mile"

drop if statefip>56
drop countyfip 
sa $temp/network, replace 

***************************************************************************
*** prepare infrastructure data (BTS) ***
*County Transportation Profiles (Bureau of Transportation Statistics)
* URL: https://www.bts.gov/ctp
****************************************************************************
insheet using $datadir/raw/County_Transportation_Profiles.csv, comma clear
drop numberofbusiness-numberofresidents 

rename countyfips fips 
keep fips countyname state* ofpoorcondition primaryandcommercial ofmedium routemilesoffreight 
rename statefips statefip 
sa $temp/transport, replace 

keep countyname statefip fips 
sa $temp/county, replace 

***************************************************************************
*** prepare zoning data (Wharton) ***
****************************************************************************
u $datadir/raw/whartonlanduse2020, clear
keep statecode state county* cbsa* q11c q11d 

rename *18 *

split countyname, parse(",")
drop countyname 
g id=_n 

reshape long countyname, i(id statecode state countycode cbsa* q11c q11d)
drop if countyname==""
drop _j 
replace countyname=strtrim(countyname)
sort statecode countyname countycode

rename statecode statefip
merge m:1 statefip countyname using $temp/county , keep(1 3)
replace fips = 51515 if _m==1 & countyname == "Bedford city"
drop _m 
drop if fips==. 
 
 collapse (mean) q11c q11d, by(statefip fips state)
 
 rename q11c comm_zoning 
 label var comm_ "perceive zoned land supply, commercial, 1=not zoned 2=far more 6=far less"
 rename q11d indus_zoning
 label var indus_ "perceive zoned land supply, industrial, 1=not zoned 2=far more 6=far less"
 sa $temp/zoning, replace

****************************************************************************
*** prepare distance to border data (Holmes) ***
*URL: http://users.econ.umn.edu/~holmes/data/BorderData.html
****************************************************************************
insheet using "$datadir/raw/county to state border.txt", clear
rename v2 bordername
rename v5 statefip 
drop v1 v3 v4 
rename v6 countyfip
rename v7 mindist 
drop v8 v9 

sort statefip countyfip 
collapse (min) mindist, by(statefip countyfip)
label var mindist "minimum distance to state border"
g fips = statefip*1000 + countyfip 

sa $temp/borders, replace 


****************************************************************************
*** prepare housing price data (Zillow) ***
****************************************************************************
insheet using "$datadir/raw/County_Zhvi_AllHomes.csv", comma clear
drop v8-v52 //dropping 1996-2000
reshape long v, i(regionid regionname state metro statecode municipal size)
by regionid: g n=_n
drop _j 
g y=floor(n/12.01) 
g year=y+2000

collapse (mean) v, by(regionid regionname state metro statecode municipal year)
rename statecodefips statefip
rename muni fipscty
keep statefip fipscty v year 
rename v zhvi 
replace zhvi=zhvi/1000

rename fipscty countyfip

sa $temp/zhvi, replace 


****************************************************************************
*** prepare state level variables ***
****************************************************************************
* state tax rates (book of states), from JEP 
use $datadir/raw/taxrates_stateyear_1950_2017.dta, clear
keep year fips stateabbrev corprate pinc* sales

* Right-to-work states
*source : https://www.ncsl.org/labor-and-employment/right-to-work-resources
g r2w=inlist(statea, "AL", "AZ", "AR", "FL", "GA", "ID")
replace r2w=1 if inlist(statea, "IA", "KS", "LA", "MS", "NE", "NV")
replace r2w=1 if inlist(statea, "NC" "ND", "OK", "SC", "SD", "TN")
replace r2w=1 if inlist(statea, "TX", "UT", "VA", "WY")

replace r2w=1 if inlist(statea,"MI","IN") & year>=2012
replace r2w=1 if statea=="WI" & year>=2015
replace r2w=1 if statea=="WV" & year>=2016
replace r2w=1 if inlist(statea, "KY", "MO") & year>=2017
// https://www.abc.org/News-Media/Newsline/missouri-becomes-28th-right-to-work-stateaspx MO became state in 2017 and repealed in 2018
label var r2w "right-to-work state"

keep if inrange(year, 2001, 2017)

* Term limited governors (FTM)
merge 1:1 stateabbrev year using $datadir/raw/gov_chars_20012018.dta, keep(1 3) nogen keepusing(term_limit)

statastates, abbrev(stateabbrev)
drop _m 
drop if stateabb=="DC"
rename state_name statename 
replace statename = proper(statename)

* Electric prices (USEIA)
merge 1:1 statename year using $datadir/raw/electric_prices, keep(1 3) nogen
format statename %20s

*price missing in 2009 in IL, use average between 2010 and 2011
sort stateabb year 
foreach v in e_comm e_ind { 
		by stateabb: replace `v' = (`v'[_n-1] + `v'[_n+1])/2 if `v'==. & year==2009
}

rename fips statefip 
sa $temp/state_level, replace 

**************************************************************************
*** ASSEMBLE ALL THE ABOVE DATA SETS 
**************************************************************************
**** population, personal income

u "$datadir/raw/bea_county_stats", clear

**** merge unemployment rate, employment level 
merge 1:1 fipscounty year using "$datadir/raw/bls_county_unemp", nogen 
drop if inlist(stateabbrev, "AK", "HI", "DC") //only using continental states 
keep if inrange(year, 2001,2017)

**** merge in density (not annual ) 
merge m:1 fipscounty using "$temp/landarea", keep(3) nogen  //missing land area for one county in SD 

g density = pop/landarea 
g epop = emp/pop 

rename fips statefip
rename fipscounty fips 
g countyfip = fips - statefip*1000 
sort year statefip countyfip

**** merge in zoning 
merge m:1 statefip fips using $temp/zoning, nogen keep(1 3)
drop state

**** merge in airport
merge m:1 stateabb countyfip using $temp/airport, nogen keep(1 3)

**** merge in infrastructure 
merge m:1 statefip fips using $temp/transport, nogen keep(1 3) 
drop *name 

**** merge in auto network 
merge m:1 statefip fips using $temp/network, nogen  keep(1 3)

**** merge in borders
merge m:1 statefip fips using $temp/borders, nogen  keep(1 3)

**** merge in university vars 
merge m:1 statefip fips using $temp/university, nogen keep(1 3)

**** merge in census data 
merge 1:1 statefip fips year using $datadir/census_county_level, nogen keep(1 3)

**** merge in housing 
merge 1:1 statefip countyfip year using $temp/zhvi, nogen keep(1 3)

**** merge in state-level data 
merge m:1 statefip stateabbrev year using $temp/state_level, nogen keep(1 3)

save "$datadir/location_char.dta", replace

